package bench;

import java.util.concurrent.atomic.AtomicInteger;

public class FannkuchRedux {
	static final AtomicInteger taskCount = new AtomicInteger();
	static int[] fact, chkSums, maxFlips;

	static void firstPermutation(int[] p, int[] pp, int[] count, int idx) {
		for (int i = 0; i < p.length; ++i)
			p[i] = i;
		for (int i = count.length - 1; i > 0; --i) {
			int d = idx / fact[i];
			count[i] = d;
			if (d > 0) {
				idx = idx % fact[i];
				//noinspection ManualArrayCopy
				for (int j = i; j >= 0; --j)
					pp[j] = p[j];
				for (int j = 0; j <= i; ++j)
					p[j] = pp[(j + d) % (i + 1)]; // p[j] = j + d <= i ? pp[j + d] : pp[j + d - i - 1];
			}
		}
	}

	static int nextPermutation(int[] p, int[] count) {
		int first = p[1];
		p[1] = p[0];
		p[0] = first;
		int i = 1;
		while (++count[i] > i) {
			count[i++] = 0;
			int next = p[1];
			p[0] = next;
			for (int j = 1; j < i; )
				p[j] = p[++j];
			p[i] = first;
			first = next;
		}
		return first;
	}

	static int countFlips(int first, int[] p, int[] pp) {
		if (first == 0)
			return 0;
		if (p[first] == 0)
			return 1;
		//noinspection ManualArrayCopy
		for (int i = 0; i < pp.length; i++)
			pp[i] = p[i];
		int flips = 2;
		while (true) {
			for (int lo = 1, hi = first - 1; lo < hi; lo++, hi--) {
				int t = pp[lo];
				pp[lo] = pp[hi];
				pp[hi] = t;
			}
			int tp = pp[first];
			if (pp[tp] == 0)
				return flips;
			pp[first] = first;
			first = tp;
			flips++;
		}
	}

	static void run(int n, int taskSize) {
		int[] p = new int[n], pp = new int[n], count = new int[n];
		int taskId, chksum = 0, maxflips = 0;
		while ((taskId = taskCount.decrementAndGet()) >= 0) {
			firstPermutation(p, pp, count, taskId * taskSize);
			var flips = countFlips(p[0], p, pp);
			chksum += flips;
			if (flips > maxflips)
				maxflips = flips;
			for (int i = 1; i < taskSize; i++) {
				flips = countFlips(nextPermutation(p, count), p, pp);
				chksum += (1 - (i % 2) * 2) * flips;
				if (flips > maxflips)
					maxflips = flips;
			}
		}
		chkSums[-taskId - 1] = chksum;
		maxFlips[-taskId - 1] = maxflips;
	}

	public static void main(String[] args) throws Exception {
		int n = args.length > 0 ? Integer.parseInt(args[0]) : 12;
		fact = new int[n + 1];
		fact[0] = 1;
		var factn = 1;
		for (int i = 1; i < fact.length; i++)
			fact[i] = factn *= i;

		taskCount.set(n > 9 ? factn / (7 * 6 * 5 * 4 * 3 * 2) : Runtime.getRuntime().availableProcessors());
		int taskSize = factn / taskCount.get();
		int nThreads = Runtime.getRuntime().availableProcessors();
		chkSums = new int[nThreads];
		maxFlips = new int[nThreads];
		var threads = new Thread[nThreads];
		for (int i = 1; i < nThreads; i++)
			(threads[i] = new Thread(() -> run(n, taskSize))).start();
		run(n, taskSize);
		int chksum = chkSums[0], maxflips = maxFlips[0];
		for (int i = 1; i < threads.length; i++) {
			threads[i].join();
			chksum += chkSums[i];
			if (maxFlips[i] > maxflips)
				maxflips = maxFlips[i];
		}
		System.out.println(chksum + "\nPfannkuchen(" + n + ") = " + maxflips);
	}
}
